1.Import Data¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import calinski_harabasz_score
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster import hierarchy
import scipy.cluster.hierarchy as shc
from kneed import KneeLocator
from sklearn.metrics import silhouette_samples, silhouette_score
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
from statsmodels.stats.outliers_influence import variance_inflation_factor as ViF
from sklearn.neighbors import NearestCentroid
In [2]:
Produksi_Buah_2020_2022=pd.read_excel("Produksi Tanaman Buah-buahan 2020-2022 gabungan.xlsx")
Produksi_Buah_2020_2022=Produksi_Buah_2020_2022.set_index("Provinsi")
In [3]:
Produksi_Buah_2020_2022.head()
Out[3]:
Alpukat Belimbing Duku Durian Jambu Biji Jambu Air Jeruk Siam Jeruk Besar Mangga Manggis ... Sawo Markisa Sirsak Sukun Melon Semangka Blewah Apel Anggur Stroberi
Provinsi
ACEH 150887 4309 22393 130595 23115 14949 46396 30986 79170 8516.0 ... 13592 1084.0 3505 5122 1084 21116.0 NaN 301.0 NaN 143.0
SUMATERA UTARA 117901 19347 58761 313678 40725 48230 1183180 5245 127630 71314.0 ... 62409 837.0 9001 6314 5126 112944.0 1.0 NaN 6.0 254.0
SUMATERA BARAT 266593 2950 20591 521775 28045 29095 381107 1684 49960 221084.0 ... 36591 36500.0 27086 9692 5611 51088.0 20.0 47.0 4.0 438.0
RIAU 4579 6706 21868 75321 33291 28619 254484 4780 53924 30975.0 ... 15694 6.0 4623 9492 3683 64243.0 58.0 NaN 59.0 NaN
JAMBI 33191 3579 50626 64455 11639 11071 168150 868 18281 30624.0 ... 11771 84.0 6031 8945 2709 16897.0 52.0 NaN 12.0 21.0

5 rows × 26 columns

In [4]:
Produksi_Buah_2020_2022.describe()
Out[4]:
Alpukat Belimbing Duku Durian Jambu Biji Jambu Air Jeruk Siam Jeruk Besar Mangga Manggis ... Sawo Markisa Sirsak Sukun Melon Semangka Blewah Apel Anggur Stroberi
count 34.000000 34.000000 34.000000 34.000000 34.000000 34.000000 3.400000e+01 34.000000 3.400000e+01 33.000000 ... 34.000000 29.000000 34.000000 34.000000 34.000000 33.000000 22.000000 1.000000e+01 26.000000 20.00000
mean 63061.588235 11194.205882 21918.029412 119659.000000 37983.705882 18438.058824 2.219542e+05 11039.441176 2.659684e+05 29394.424242 ... 15407.500000 1838.620690 12622.235294 15528.176471 11353.441176 40678.151515 1502.636364 1.549668e+05 1445.769231 2355.30000
std 117636.774955 35054.048100 27213.153865 194454.880051 77259.897458 31456.862337 4.903489e+05 26544.667640 7.499428e+05 52298.182488 ... 22026.930314 6887.045896 23906.914817 25926.683755 34873.462228 71321.659603 4394.608109 4.895717e+05 6492.170449 8387.45517
min 120.000000 47.000000 27.000000 650.000000 66.000000 16.000000 6.490000e+02 45.000000 5.918000e+03 9.000000 ... 1.000000 1.000000 102.000000 19.000000 10.000000 394.000000 1.000000 3.000000e+00 1.000000 1.00000
25% 2824.250000 1142.750000 4376.750000 20012.500000 3964.500000 3117.250000 1.445975e+04 610.500000 1.842575e+04 2399.000000 ... 703.750000 8.000000 1256.250000 2347.000000 968.250000 4647.000000 10.250000 1.625000e+01 7.000000 23.75000
50% 10819.000000 2211.500000 14785.000000 55420.500000 11281.500000 5934.500000 5.845900e+04 1388.500000 4.531900e+04 6371.000000 ... 5211.000000 70.000000 3878.000000 5535.000000 1903.500000 16897.000000 35.000000 5.750000e+01 27.500000 224.00000
75% 56122.500000 4250.500000 22849.750000 113428.250000 31145.500000 18469.500000 1.712295e+05 4477.500000 9.367225e+04 28751.000000 ... 15168.500000 329.000000 8655.500000 9642.000000 4765.250000 47229.000000 168.750000 2.527500e+02 111.750000 452.50000
max 517574.000000 200209.000000 138448.000000 971027.000000 308606.000000 154531.000000 2.611658e+06 106417.000000 4.079350e+06 221084.000000 ... 78460.000000 36500.000000 98601.000000 116228.000000 188639.000000 368149.000000 19709.000000 1.548312e+06 33207.000000 37826.00000

8 rows × 26 columns

In [5]:
Produksi_Buah_2020_2022.info()
<class 'pandas.core.frame.DataFrame'>
Index: 34 entries, ACEH to PAPUA
Data columns (total 26 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Alpukat      34 non-null     int64  
 1   Belimbing    34 non-null     int64  
 2   Duku         34 non-null     int64  
 3   Durian       34 non-null     int64  
 4   Jambu Biji   34 non-null     int64  
 5   Jambu Air    34 non-null     int64  
 6   Jeruk Siam   34 non-null     int64  
 7   Jeruk Besar  34 non-null     int64  
 8   Mangga       34 non-null     int64  
 9   Manggis      33 non-null     float64
 10  Nangka       34 non-null     int64  
 11  Nanas        33 non-null     float64
 12  Pepaya       34 non-null     int64  
 13  Pisang       34 non-null     int64  
 14  Rambutan     34 non-null     int64  
 15  Salak        34 non-null     int64  
 16  Sawo         34 non-null     int64  
 17  Markisa      29 non-null     float64
 18  Sirsak       34 non-null     int64  
 19  Sukun        34 non-null     int64  
 20  Melon        34 non-null     int64  
 21  Semangka     33 non-null     float64
 22  Blewah       22 non-null     float64
 23  Apel         10 non-null     float64
 24  Anggur       26 non-null     float64
 25  Stroberi     20 non-null     float64
dtypes: float64(8), int64(18)
memory usage: 7.2+ KB

2.Explanatory Data Analysis (EDA)¶

In [6]:
sns.set(rc={"figure.figsize":(10, 6)})
box2020_2022 = sns.boxplot(data=Produksi_Buah_2020_2022,orient="h")
plt.xlabel("Berat Hasil Produksi Buah (dari jutaan ton)")
plt.ylabel("Jenis Hasil Produksi Buah")
plt.show(box2020_2022)

2.1 Data Cleaning¶

In [7]:
ProduksiBuah20_22=Produksi_Buah_2020_2022.drop(columns=['Markisa','Blewah']) 
#kedua variabel tidak mempunyai produksi diantara 34 provinsi pada tahun 2021 sehingga variabel tidak digunakan.
In [8]:
ProduksiBuah20_22= ProduksiBuah20_22.fillna(0)
In [9]:
ProduksiBuah20_22
Out[9]:
Alpukat Belimbing Duku Durian Jambu Biji Jambu Air Jeruk Siam Jeruk Besar Mangga Manggis ... Rambutan Salak Sawo Sirsak Sukun Melon Semangka Apel Anggur Stroberi
Provinsi
ACEH 150887 4309 22393 130595 23115 14949 46396 30986 79170 8516.0 ... 100626 3034 13592 3505 5122 1084 21116.0 301.0 0.0 143.0
SUMATERA UTARA 117901 19347 58761 313678 40725 48230 1183180 5245 127630 71314.0 ... 79792 859993 62409 9001 6314 5126 112944.0 0.0 6.0 254.0
SUMATERA BARAT 266593 2950 20591 521775 28045 29095 381107 1684 49960 221084.0 ... 87406 6150 36591 27086 9692 5611 51088.0 47.0 4.0 438.0
RIAU 4579 6706 21868 75321 33291 28619 254484 4780 53924 30975.0 ... 89349 12387 15694 4623 9492 3683 64243.0 0.0 59.0 0.0
JAMBI 33191 3579 50626 64455 11639 11071 168150 868 18281 30624.0 ... 20499 17355 11771 6031 8945 2709 16897.0 0.0 12.0 21.0
SUMATERA SELATAN 105419 4068 138448 117258 18863 30010 172256 1184 52521 5916.0 ... 40836 6044 21356 4299 8519 5271 45418.0 3.0 12.0 194.0
BENGKULU 22152 1089 2473 55868 5075 3337 69186 1370 22511 15944.0 ... 18884 10062 5576 1413 1742 1368 2819.0 48.0 37.0 159.0
LAMPUNG 63732 10199 29695 62604 47611 31368 180335 3489 62078 28036.0 ... 59123 26553 44535 8682 20771 2120 62769.0 0.0 90.0 1.0
KEP. BANGKA BELITUNG 4090 393 2103 12793 1745 4813 5472 279 5918 2399.0 ... 5960 2998 1795 440 1558 1023 10017.0 0.0 0.0 0.0
KEP. RIAU 480 337 893 11568 1496 1037 698 69 6122 564.0 ... 5060 20172 2717 1094 3385 277 5164.0 0.0 0.0 0.0
DKI JAKARTA 3423 7676 34 650 4554 6361 649 45 20108 0.0 ... 11060 143 686 226 2506 10 0.0 0.0 14.0 0.0
JAWA BARAT 336037 25248 23002 240136 228644 78419 152813 23314 1337834 158602.0 ... 436610 80852 78460 42522 75440 2505 29679.0 0.0 168.0 37826.0
JAWA TENGAH 279669 57908 45767 479012 308606 154531 84477 55588 1445394 28751.0 ... 358116 1426748 63432 63961 116228 81932 203744.0 67.0 389.0 2898.0
DI YOGYAKARTA 24737 2274 8730 24878 15835 11786 7639 534 90355 8044.0 ... 41298 166790 8678 8576 38525 43781 20572.0 0.0 57.0 45.0
JAWA TIMUR 517574 200209 52445 971027 292169 83538 2611658 106417 4079350 156703.0 ... 405513 551285 58148 98601 73982 188639 368149.0 1548312.0 1843.0 2482.0
BANTEN 5866 4548 9878 90105 17618 19643 2411 515 94778 22488.0 ... 92262 3830 9077 11994 36096 975 4788.0 0.0 18.0 23.0
BALI 11263 1596 5465 74130 10924 6065 861555 5526 161993 59340.0 ... 58417 128845 10313 1472 1186 803 53426.0 6.0 33207.0 1155.0
NUSA TENGGARA BARAT 33294 1996 2471 102770 44368 7947 19847 3325 435121 62272.0 ... 55385 262 41150 84638 1761 7502 63944.0 772.0 1312.0 496.0
NUSA TENGGARA TIMUR 70890 2149 27 9897 32179 5539 160158 6106 229802 73.0 ... 18758 8604 3253 11768 10532 1035 6703.0 108.0 42.0 352.0
KALIMANTAN BARAT 4548 4075 20404 74767 13412 8950 324928 1978 23131 17111.0 ... 37101 5498 13112 5430 5948 1169 16216.0 0.0 30.0 0.0
KALIMANTAN TENGAH 1844 3274 4014 18952 10389 6626 21798 1088 6871 1280.0 ... 31475 2880 4450 2590 4011 2470 40153.0 0.0 25.0 312.0
KALIMANTAN SELATAN 120 2388 20608 54851 5685 2545 387016 1293 22399 3589.0 ... 45627 2664 4846 4094 6805 2720 47229.0 0.0 1.0 0.0
KALIMANTAN TIMUR 2460 3342 17592 32710 5834 4380 31573 1407 12403 938.0 ... 31241 34227 6819 3068 4925 1457 14669.0 0.0 0.0 0.0
KALIMANTAN UTARA 261 661 13196 23194 1119 1986 15872 285 20784 281.0 ... 24914 13257 332 1204 2053 1687 4647.0 0.0 2.0 0.0
SULAWESI UTARA 17925 1304 21625 38768 8585 3403 3266 1299 48286 6371.0 ... 28672 37950 2 5702 3070 2413 2420.0 0.0 2.0 0.0
SULAWESI TENGAH 17891 763 15185 116981 2195 3044 10056 666 37718 5294.0 ... 28801 2535 574 1100 1163 6594 19115.0 0.0 128.0 3.0
SULAWESI SELATAN 24000 1523 67959 167739 36423 5804 30905 106276 335685 9524.0 ... 108485 34876 2064 5400 40360 2309 25453.0 0.0 10.0 279.0
SULAWESI TENGGARA 3275 1731 11905 48369 5650 4700 92361 3570 47350 3667.0 ... 37403 5473 757 3662 8024 772 3626.0 0.0 119.0 24.0
GORONTALO 121 47 1026 14641 66 16 62034 335 8747 9.0 ... 4100 30 1 102 19 142 394.0 0.0 0.0 0.0
SULAWESI BARAT 1148 294 23170 54973 936 807 54884 592 43288 3835.0 ... 18296 224 182 533 3308 72 589.0 0.0 0.0 0.0
MALUKU 4442 1551 8460 36085 3768 3596 47175 2595 18860 3192.0 ... 3549 6886 60 1933 9094 966 3067.0 4.0 0.0 0.0
MALUKU UTARA 2674 725 9582 15432 1684 1959 7184 692 14102 2971.0 ... 20983 6297 24 1135 2294 329 2840.0 0.0 1.0 0.0
PAPUA BARAT 10375 1785 14385 10675 27576 1865 13989 1661 16284 42.0 ... 13179 7575 1285 2645 4132 796 2316.0 0.0 0.0 1.0
PAPUA 1233 559 432 1749 1622 855 80932 280 14168 267.0 ... 2261 324 114 626 956 6667 16165.0 0.0 2.0 0.0

34 rows × 24 columns

In [10]:
sns.relplot(data=ProduksiBuah20_22)
Out[10]:
<seaborn.axisgrid.FacetGrid at 0x192996d8a90>
In [11]:
sns.heatmap(ProduksiBuah20_22.isnull(),yticklabels=False,cbar=False,cmap='viridis')
Out[11]:
<AxesSubplot:ylabel='Provinsi'>
In [12]:
sns.heatmap(ProduksiBuah20_22.corr())
Out[12]:
<AxesSubplot:>

2.2 Data Visualization¶

In [13]:
sns.set(rc={"figure.figsize":(10, 6)})
box20_22 = sns.boxplot(data=ProduksiBuah20_22,orient="h")
plt.xlabel("Berat Hasil Produksi Buah (dari jutaan ton)")
plt.ylabel("Jenis Hasil Produksi Buah")
plt.show(box20_22)
In [14]:
ProduksiBuah20_22_copy = ProduksiBuah20_22.copy()
In [15]:
pb = ProduksiBuah20_22.to_numpy()

2.3 Data Transformation¶

In [16]:
scaler = MinMaxScaler()
#scaler = StandardScaler()
In [17]:
ProduksiBuah20_22_scaled = scaler.fit_transform(ProduksiBuah20_22.to_numpy())
ProduksiBuah20_22_scaled = pd.DataFrame(ProduksiBuah20_22_scaled, columns=ProduksiBuah20_22.columns,index=ProduksiBuah20_22.index)
In [18]:
print("Scaled Dataset Using MinMaxScaler")
ProduksiBuah20_22_scaled
Scaled Dataset Using MinMaxScaler
Out[18]:
Alpukat Belimbing Duku Durian Jambu Biji Jambu Air Jeruk Siam Jeruk Besar Mangga Manggis ... Rambutan Salak Sawo Sirsak Sukun Melon Semangka Apel Anggur Stroberi
Provinsi
ACEH 0.291363 0.021293 0.161580 0.133912 0.074703 0.096644 0.017521 0.290875 0.017983 0.038519 ... 0.226465 0.002106 0.173224 0.034549 0.043912 0.005694 0.057357 0.000194 0.000000 0.003780
SUMATERA UTARA 0.227616 0.096422 0.424314 0.322584 0.131779 0.312034 0.452902 0.048885 0.029879 0.322565 ... 0.178499 0.602756 0.795422 0.090346 0.054170 0.027122 0.306789 0.000000 0.000181 0.006715
SUMATERA BARAT 0.514969 0.014503 0.148561 0.537034 0.090682 0.188195 0.145713 0.015408 0.010812 1.000000 ... 0.196029 0.004290 0.466358 0.273952 0.083238 0.029693 0.138770 0.000030 0.000120 0.011579
RIAU 0.008617 0.033268 0.157787 0.076951 0.107685 0.185115 0.097217 0.044514 0.011785 0.140105 ... 0.200502 0.008661 0.200015 0.045899 0.081517 0.019472 0.174503 0.000000 0.001777 0.000000
JAMBI 0.063911 0.017646 0.365544 0.065753 0.037509 0.071546 0.064152 0.007737 0.003035 0.138517 ... 0.041989 0.012143 0.150015 0.060194 0.076810 0.014309 0.045897 0.000000 0.000361 0.000555
SUMATERA SELATAN 0.203494 0.020089 1.000000 0.120168 0.060922 0.194117 0.065724 0.010708 0.011441 0.026759 ... 0.088811 0.004215 0.272180 0.042610 0.073144 0.027891 0.123369 0.000002 0.000361 0.005129
BENGKULU 0.042578 0.005206 0.017671 0.056904 0.016235 0.021493 0.026249 0.012456 0.004073 0.072117 ... 0.038271 0.007032 0.071056 0.013310 0.014827 0.007199 0.007657 0.000031 0.001114 0.004203
LAMPUNG 0.122933 0.050719 0.214332 0.063845 0.154097 0.202906 0.068819 0.032377 0.013787 0.126812 ... 0.130913 0.018590 0.567609 0.087107 0.178575 0.011186 0.170499 0.000000 0.002710 0.000026
KEP. BANGKA BELITUNG 0.007672 0.001729 0.014998 0.012514 0.005442 0.031046 0.001847 0.002200 0.000000 0.010851 ... 0.008516 0.002080 0.022865 0.003432 0.013243 0.005370 0.027209 0.000000 0.000000 0.000000
KEP. RIAU 0.000696 0.001449 0.006256 0.011251 0.004635 0.006608 0.000019 0.000226 0.000050 0.002551 ... 0.006444 0.014118 0.034617 0.010071 0.028965 0.001415 0.014027 0.000000 0.000000 0.000000
DKI JAKARTA 0.006383 0.038114 0.000051 0.000000 0.014546 0.041064 0.000000 0.000000 0.003484 0.000000 ... 0.020258 0.000079 0.008731 0.001259 0.021401 0.000000 0.000000 0.000000 0.000422 0.000000
JAWA BARAT 0.649173 0.125903 0.165979 0.246797 0.740837 0.507414 0.058278 0.218751 0.326976 0.717383 ... 1.000000 0.056649 1.000000 0.430664 0.649012 0.013227 0.080617 0.000000 0.005059 1.000000
JAWA TENGAH 0.540239 0.289071 0.330441 0.492965 1.000000 1.000000 0.032106 0.522158 0.353382 0.130046 ... 0.819284 1.000000 0.808460 0.648321 1.000000 0.434302 0.553428 0.000043 0.011714 0.076614
DI YOGYAKARTA 0.047573 0.011126 0.062873 0.024968 0.051108 0.076174 0.002677 0.004597 0.020729 0.036384 ... 0.089875 0.116884 0.110593 0.086031 0.331351 0.232048 0.055880 0.000000 0.001717 0.001190
JAWA TIMUR 1.000000 1.000000 0.378685 1.000000 0.946727 0.540543 1.000000 1.000000 1.000000 0.708794 ... 0.928405 0.386380 0.741113 1.000000 0.636465 1.000000 1.000000 1.000000 0.055500 0.065616
BANTEN 0.011104 0.022487 0.071167 0.092186 0.056887 0.127023 0.000675 0.004418 0.021815 0.101717 ... 0.207209 0.002663 0.115678 0.120732 0.310449 0.005116 0.013006 0.000000 0.000542 0.000608
BALI 0.021534 0.007739 0.039286 0.075723 0.035192 0.039148 0.329722 0.051527 0.038315 0.268405 ... 0.129288 0.090288 0.131432 0.013909 0.010042 0.004204 0.145121 0.000004 1.000000 0.030535
NUSA TENGGARA BARAT 0.064110 0.009737 0.017656 0.105237 0.143586 0.051328 0.007353 0.030835 0.105366 0.281667 ... 0.122307 0.000163 0.524465 0.858242 0.014990 0.039718 0.173691 0.000499 0.039510 0.013113
NUSA TENGGARA TIMUR 0.136766 0.010501 0.000000 0.009529 0.104081 0.035744 0.061091 0.056979 0.054962 0.000330 ... 0.037981 0.006010 0.041448 0.118438 0.090466 0.005434 0.018207 0.000070 0.001265 0.009306
KALIMANTAN BARAT 0.008557 0.020124 0.147210 0.076380 0.043255 0.057820 0.124197 0.018172 0.004226 0.077396 ... 0.080212 0.003833 0.167106 0.054092 0.051020 0.006144 0.044047 0.000000 0.000903 0.000000
KALIMANTAN TENGAH 0.003332 0.016122 0.028803 0.018861 0.033458 0.042779 0.008100 0.009805 0.000234 0.005790 ... 0.067259 0.001998 0.056705 0.025259 0.034352 0.013041 0.109067 0.000000 0.000753 0.008248
KALIMANTAN SELATAN 0.000000 0.011696 0.148684 0.055856 0.018212 0.016367 0.147976 0.011732 0.004046 0.016234 ... 0.099841 0.001846 0.061752 0.040528 0.058395 0.014367 0.128288 0.000000 0.000030 0.000000
KALIMANTAN TIMUR 0.004522 0.016462 0.126895 0.033039 0.018694 0.028243 0.011844 0.012804 0.001592 0.004243 ... 0.066721 0.023969 0.086899 0.030112 0.042217 0.007671 0.039845 0.000000 0.000000 0.000000
KALIMANTAN UTARA 0.000272 0.003068 0.095137 0.023232 0.003413 0.012750 0.005830 0.002256 0.003650 0.001271 ... 0.052154 0.009271 0.004219 0.011188 0.017503 0.008890 0.012623 0.000000 0.000060 0.000000
SULAWESI UTARA 0.034409 0.006280 0.156031 0.039282 0.027611 0.021920 0.001002 0.011789 0.010401 0.028817 ... 0.060806 0.026578 0.000013 0.056853 0.026254 0.012739 0.006573 0.000000 0.000060 0.000000
SULAWESI TENGAH 0.034343 0.003577 0.109507 0.119882 0.006900 0.019597 0.003603 0.005838 0.007807 0.023946 ... 0.061103 0.001756 0.007303 0.010132 0.009844 0.034904 0.051922 0.000000 0.003855 0.000079
SULAWESI SELATAN 0.046149 0.007374 0.490764 0.172190 0.117836 0.037459 0.011588 0.998674 0.080956 0.043079 ... 0.244559 0.024424 0.026294 0.053787 0.347142 0.012188 0.069138 0.000000 0.000301 0.007376
SULAWESI TENGGARA 0.006097 0.008413 0.085811 0.049176 0.018098 0.030314 0.035125 0.033138 0.010171 0.016586 ... 0.080907 0.003815 0.009636 0.036142 0.068885 0.004040 0.009849 0.000000 0.003584 0.000634
GORONTALO 0.000002 0.000000 0.007217 0.014418 0.000000 0.000000 0.023510 0.002726 0.000695 0.000041 ... 0.004234 0.000000 0.000000 0.000000 0.000000 0.000700 0.001070 0.000000 0.000000 0.000000
SULAWESI BARAT 0.001987 0.001234 0.167193 0.055981 0.002820 0.005119 0.020772 0.005142 0.009174 0.017346 ... 0.036917 0.000136 0.002307 0.004376 0.028302 0.000329 0.001600 0.000000 0.000000 0.000000
MALUKU 0.008352 0.007514 0.060923 0.036517 0.011998 0.023169 0.017819 0.023972 0.003177 0.014438 ... 0.002965 0.004805 0.000752 0.018589 0.078092 0.005068 0.008331 0.000003 0.000000 0.000000
MALUKU UTARA 0.004936 0.003387 0.069029 0.015233 0.005244 0.012575 0.002503 0.006082 0.002009 0.013438 ... 0.043104 0.004393 0.000293 0.010487 0.019577 0.001691 0.007714 0.000000 0.000030 0.000000
PAPUA BARAT 0.019818 0.008683 0.103727 0.010331 0.089162 0.011966 0.005109 0.015192 0.002545 0.000190 ... 0.025136 0.005288 0.016365 0.025818 0.035393 0.004167 0.006291 0.000000 0.000000 0.000026
PAPUA 0.002151 0.002558 0.002926 0.001133 0.005043 0.005430 0.030748 0.002209 0.002025 0.001208 ... 0.000000 0.000206 0.001440 0.005320 0.008063 0.035291 0.043909 0.000000 0.000060 0.000000

34 rows × 24 columns

In [19]:
sns.relplot(data=ProduksiBuah20_22_scaled)
Out[19]:
<seaborn.axisgrid.FacetGrid at 0x192991f66d0>
In [20]:
sns.set(rc={"figure.figsize":(10, 6)})
box20_22 = sns.boxplot(data=ProduksiBuah20_22_scaled,orient="h")
plt.xlabel("Berat Hasil Produksi Buah (dari jutaan ton)")
plt.ylabel("Jenis Hasil Produksi Buah")
plt.show(box20_22)

2.4 VIF¶

In [21]:
def Calc_VIF(Data):
    #Calculating VIF
    VIF = pd.DataFrame()
    VIF['Variables'] = Data.columns
    VIF['VIF'] = [ViF(Data.values, i) for i in range(Data.shape[1])]
    return VIF
In [22]:
Calc_VIF(ProduksiBuah20_22_scaled)
Out[22]:
Variables VIF
0 Alpukat 231.974512
1 Belimbing 1400.773457
2 Duku 13.171865
3 Durian 257.046865
4 Jambu Biji 359.296810
5 Jambu Air 1813.794145
6 Jeruk Siam 150.844569
7 Jeruk Besar 111.155240
8 Mangga 4794.463991
9 Manggis 106.549009
10 Nangka 186.399033
11 Nanas 155.402171
12 Pepaya 56.890755
13 Pisang 411.241944
14 Rambutan 309.253186
15 Salak 219.163682
16 Sawo 566.568867
17 Sirsak 442.691592
18 Sukun 342.950252
19 Melon 334.925102
20 Semangka 205.517478
21 Apel 2721.030432
22 Anggur 13.983284
23 Stroberi 428.268248
In [23]:
ProduksiBuah20_22_scaled_cluster = ProduksiBuah20_22_scaled.copy()
psc = ProduksiBuah20_22_scaled.to_numpy()

3. PCA¶

In [24]:
pca = PCA(n_components=3)
In [25]:
principalComponents = pca.fit_transform(ProduksiBuah20_22_scaled_cluster)
In [26]:
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
Explained variation per principal component: [0.65229876 0.086718   0.06117068]
In [27]:
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2','principal component 3'],index = ProduksiBuah20_22_scaled.index)
#,'principal component 4','principal component 5','principal component 6','principal component 7','principal component 8','principal component 9','principal component 10'
In [28]:
principalDf
Out[28]:
principal component 1 principal component 2 principal component 3
Provinsi
ACEH -0.092806 0.047337 0.024086
SUMATERA UTARA 0.472903 -0.261143 -0.021415
SUMATERA BARAT 0.384959 -0.152729 -0.776127
RIAU -0.083750 -0.117560 0.058292
JAMBI -0.239929 -0.039618 0.026891
SUMATERA SELATAN 0.007483 -0.239554 0.309540
BENGKULU -0.411179 0.058462 -0.062127
LAMPUNG 0.349867 -0.437603 0.146488
KEP. BANGKA BELITUNG -0.482492 0.072243 -0.000884
KEP. RIAU -0.487729 0.066953 0.001912
DKI JAKARTA -0.489489 0.075747 0.004065
JAWA BARAT 1.596476 -1.023694 -0.552769
JAWA TENGAH 2.174177 -0.537051 0.835334
DI YOGYAKARTA -0.193676 0.018822 0.118851
JAWA TIMUR 3.453580 1.080844 -0.173608
BANTEN -0.182623 -0.058624 0.004994
BALI -0.156027 0.187157 -0.351779
NUSA TENGGARA BARAT 0.220100 -0.113325 -0.286556
NUSA TENGGARA TIMUR -0.192434 0.107810 -0.000499
KALIMANTAN BARAT -0.261015 -0.015714 -0.004639
KALIMANTAN TENGAH -0.390270 0.060895 0.019443
KALIMANTAN SELATAN -0.319736 0.101512 0.022756
KALIMANTAN TIMUR -0.355860 0.032815 0.048697
KALIMANTAN UTARA -0.427224 0.083359 0.032041
SULAWESI UTARA -0.397065 0.071251 0.027039
SULAWESI TENGAH -0.409109 0.090917 0.005913
SULAWESI SELATAN 0.106292 0.246762 0.437292
SULAWESI TENGGARA -0.393085 0.076199 0.025919
GORONTALO -0.513739 0.096608 -0.010166
SULAWESI BARAT -0.450433 0.080170 0.024656
MALUKU -0.443985 0.086514 0.024672
MALUKU UTARA -0.479259 0.074951 0.009942
PAPUA BARAT -0.420570 0.067151 0.035231
PAPUA -0.492354 0.112136 -0.003484
In [29]:
plt.figure(figsize=(15,7))
plt.scatter(principalComponents[:,0],principalComponents[:,1],s = 50,cmap='rainbow')
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')
Out[29]:
Text(0, 0.5, 'Second Principal Component')
In [30]:
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
Explained variation per principal component: [0.65229876 0.086718   0.06117068]
In [31]:
evc = pca.explained_variance_ratio_.cumsum()
evc
Out[31]:
array([0.65229876, 0.73901675, 0.80018744])
In [32]:
plt.ylabel('% Variance Explained')
plt.xlabel('# of Features')
plt.title('PCA Analysis')
plt.style.context('seaborn-whitegrid')
#plt.axhline(y = 0.8, color = 'r', linestyle = '-')

plt.plot(evc)
Out[32]:
[<matplotlib.lines.Line2D at 0x1929a4d3550>]
In [33]:
print(pca.singular_values_)
[4.82349523 1.75870583 1.47710115]
In [34]:
loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2','PC3'],index=['Alpukat (Ton)','Belimbing (Ton)','Duku (Ton)','Durian (Ton)','Jambu Biji (Ton)','Jambu Air (Ton)','Jeruk Siam (Ton)','Jeruk Besar (Ton)','Mangga (Ton)','Manggis (Ton)','Nangka (Ton)','Nenas (Ton)','Pepaya (Ton)','Pisang (Ton)','Rambutan (Ton)','Salak (Ton)','Sawo (Ton)','Sirsak (Ton)','Sukun (Ton)','Melon (Ton)','Semangka (Ton)','Apel (Ton)','Anggur (Ton)','Stroberi (Ton)'])
loadings
#,'PC4','PC5','PC6','PC7','PC8','PC9','PC10'
Out[34]:
PC1 PC2 PC3
Alpukat (Ton) 0.252451 -0.022119 -0.181361
Belimbing (Ton) 0.183705 0.242558 0.001300
Duku (Ton) 0.086611 -0.070551 0.270380
Durian (Ton) 0.216049 0.135159 -0.109172
Jambu Biji (Ton) 0.284728 -0.118264 0.114043
Jambu Air (Ton) 0.211105 -0.228374 0.190181
Jeruk Siam (Ton) 0.157324 0.292070 -0.170702
Jeruk Besar (Ton) 0.213581 0.266015 0.256476
Mangga (Ton) 0.203846 0.181947 -0.030309
Manggis (Ton) 0.184538 -0.102790 -0.599328
Nangka (Ton) 0.256665 0.015693 0.125663
Nenas (Ton) 0.125161 -0.270899 0.192270
Pepaya (Ton) 0.221634 0.077304 -0.010064
Pisang (Ton) 0.222451 0.029697 -0.068860
Rambutan (Ton) 0.276040 -0.167025 -0.032217
Salak (Ton) 0.163654 -0.101400 0.330312
Sawo (Ton) 0.277351 -0.412929 -0.167949
Sirsak (Ton) 0.245180 0.038517 -0.120927
Sukun (Ton) 0.222552 -0.169971 0.253015
Melon (Ton) 0.186388 0.266910 0.087919
Semangka (Ton) 0.207232 0.174681 0.068631
Apel (Ton) 0.148446 0.349422 -0.079629
Anggur (Ton) 0.003172 0.074561 -0.167209
Stroberi (Ton) 0.085463 -0.320293 -0.237788
In [35]:
pc1_loadings = loadings.sort_values(by='PC1', ascending=False)[['PC1']]
pc1_loadings = pc1_loadings.reset_index()
pc1_loadings.columns = ['Attribute', 'CorrelationWithPC1']

plt.bar(x=pc1_loadings['Attribute'], height=pc1_loadings['CorrelationWithPC1'], color='#087E8B')
plt.title('PCA loading scores (first principal component)', size=20)
plt.xticks(rotation='vertical')
plt.show()

4. K-Means¶

In [36]:
wcss = []
for i in range (1,11):
    kmeans_pca = KMeans(i)
    kmeans_pca.fit(principalComponents)
    wcss.append(kmeans_pca.inertia_)
In [37]:
wcss
Out[37]:
[28.54098019819429,
 9.372392587904084,
 5.3572564298669425,
 3.2111782243268263,
 1.962483422058978,
 1.3226630825115648,
 0.9173946451958412,
 0.5985585401544481,
 0.45682420658711,
 0.3127598148093504]
In [38]:
y=wcss
x=range(1, len(y)+1)
kn = KneeLocator(x, y, curve='convex', direction='decreasing')
print(kn.knee)
3
In [39]:
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Within-cluster Sum of Squares')
plt.plot(x, y, 'bx-')
plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
Out[39]:
<matplotlib.collections.LineCollection at 0x1929a61bfd0>
k=3¶
In [40]:
kmeans3_i = KMeans(n_clusters=3, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans3_i.fit(principalDf)
identified_clusters_Kmeans3_i = kmeans3_i.fit_predict(principalDf)
identified_clusters_Kmeans3_i
Out[40]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [41]:
kmeans3_i.feature_names_in_
Out[41]:
array(['principal component 1', 'principal component 2',
       'principal component 3'], dtype=object)
In [42]:
kmeans3_r = KMeans(n_clusters=3, init='random',n_init=10,max_iter=300,random_state=42)
kmeans3_r.fit(principalDf)
identified_clusters_Kmeans3_r = kmeans3_r.fit_predict(principalDf)
identified_clusters_Kmeans3_r
Out[42]:
array([2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 1, 1, 2, 1, 2, 2, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
k=4¶
In [43]:
kmeans4 = KMeans(n_clusters=4, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans4.fit(principalDf)
identified_clusters_Kmeans4 = kmeans4.fit_predict(principalDf)
identified_clusters_Kmeans4
Out[43]:
array([0, 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 1, 1, 0, 2, 0, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [44]:
kmeans4_r = KMeans(n_clusters=4, init='random',n_init=10,max_iter=300,random_state=42)
kmeans4_r.fit(principalDf)
identified_clusters_Kmeans4_r = kmeans4_r.fit_predict(principalDf)
identified_clusters_Kmeans4_r
Out[44]:
array([2, 1, 1, 2, 2, 2, 0, 1, 0, 0, 0, 1, 3, 2, 3, 2, 2, 1, 2, 2, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0])
k=5¶
In [45]:
kmeans5 = KMeans(n_clusters=5, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans5.fit(principalDf)
identified_clusters_Kmeans5 = kmeans5.fit_predict(principalDf)
identified_clusters_Kmeans5
Out[45]:
array([2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 4, 1, 2, 3, 2, 2, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
In [46]:
kmeans5_r = KMeans(n_clusters=5, init='random',n_init=10,max_iter=300,random_state=42)
kmeans5_r.fit(principalDf)
identified_clusters_Kmeans5_r = kmeans5_r.fit_predict(principalDf)
identified_clusters_Kmeans5_r
Out[46]:
array([2, 1, 1, 2, 2, 2, 0, 1, 0, 0, 0, 4, 4, 2, 3, 2, 2, 1, 2, 2, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0])
In [47]:
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], c= identified_clusters_Kmeans3_i, s=100, cmap='rainbow')
plt.title("Optimal Number of Clusters")

centers = kmeans3_i.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')
plt.show()
In [48]:
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], c= identified_clusters_Kmeans3_r, s=100, cmap='rainbow')
plt.title("Optimal Number of Clusters")

centers = kmeans3_r.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')
plt.show()
In [49]:
Kmeans_Tabel = pd.DataFrame(data =identified_clusters_Kmeans3_i, columns=['k=3 kmeans++'], index = ProduksiBuah20_22_scaled_cluster.index)
In [50]:
Kmeans_Tabel['k=4 kmeans++'] = identified_clusters_Kmeans4
Kmeans_Tabel['k=5 kmeans++'] = identified_clusters_Kmeans5
Kmeans_Tabel['k=3 random'] = identified_clusters_Kmeans3_r
Kmeans_Tabel['k=4 random'] = identified_clusters_Kmeans4_r
Kmeans_Tabel['k=5 random'] = identified_clusters_Kmeans5_r
In [51]:
Kmeans_Tabel 
Out[51]:
k=3 kmeans++ k=4 kmeans++ k=5 kmeans++ k=3 random k=4 random k=5 random
Provinsi
ACEH 0 0 2 2 2 2
SUMATERA UTARA 0 3 0 0 1 1
SUMATERA BARAT 0 3 0 0 1 1
RIAU 0 0 2 2 2 2
JAMBI 0 0 2 2 2 2
SUMATERA SELATAN 0 0 2 2 2 2
BENGKULU 0 0 2 2 0 0
LAMPUNG 0 3 0 0 1 1
KEP. BANGKA BELITUNG 0 0 2 2 0 0
KEP. RIAU 0 0 2 2 0 0
DKI JAKARTA 0 0 2 2 0 0
JAWA BARAT 1 1 4 1 1 4
JAWA TENGAH 1 1 1 1 3 4
DI YOGYAKARTA 0 0 2 2 2 2
JAWA TIMUR 2 2 3 1 3 3
BANTEN 0 0 2 2 2 2
BALI 0 0 2 2 2 2
NUSA TENGGARA BARAT 0 3 0 0 1 1
NUSA TENGGARA TIMUR 0 0 2 2 2 2
KALIMANTAN BARAT 0 0 2 2 2 2
KALIMANTAN TENGAH 0 0 2 2 0 0
KALIMANTAN SELATAN 0 0 2 2 0 0
KALIMANTAN TIMUR 0 0 2 2 0 0
KALIMANTAN UTARA 0 0 2 2 0 0
SULAWESI UTARA 0 0 2 2 0 0
SULAWESI TENGAH 0 0 2 2 0 0
SULAWESI SELATAN 0 0 2 2 2 2
SULAWESI TENGGARA 0 0 2 2 0 0
GORONTALO 0 0 2 2 0 0
SULAWESI BARAT 0 0 2 2 0 0
MALUKU 0 0 2 2 0 0
MALUKU UTARA 0 0 2 2 0 0
PAPUA BARAT 0 0 2 2 0 0
PAPUA 0 0 2 2 0 0

5. Single Linkage¶

In [52]:
cluster3 = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='single')
cluster3 =cluster3.fit_predict(principalComponents)
cluster3 
Out[52]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
In [53]:
cluster4 = AgglomerativeClustering(n_clusters=4, affinity='euclidean', linkage='single')
cluster4 =cluster4.fit_predict(principalComponents)
cluster4
Out[53]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
In [54]:
cluster5 = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='single')
cluster5 =cluster5.fit_predict(principalComponents)
cluster5 
Out[54]:
array([0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
In [55]:
plt.figure(figsize=(15, 7))
plt.title("Dendrogram")
dend = dendrogram(hierarchy.linkage(principalComponents, method='single'))
plt.axhline(y=1, color='black', linestyle='--')
plt.show()
In [56]:
SingleLinkage_Tabel = pd.DataFrame(data =cluster3, columns=['k=3'], index = ProduksiBuah20_22_scaled_cluster.index)
In [57]:
SingleLinkage_Tabel['k=4'] = cluster4
SingleLinkage_Tabel['k=5'] = cluster5
In [58]:
SingleLinkage_Tabel
Out[58]:
k=3 k=4 k=5
Provinsi
ACEH 0 0 0
SUMATERA UTARA 0 0 0
SUMATERA BARAT 0 0 4
RIAU 0 0 0
JAMBI 0 0 0
SUMATERA SELATAN 0 0 0
BENGKULU 0 0 0
LAMPUNG 0 0 0
KEP. BANGKA BELITUNG 0 0 0
KEP. RIAU 0 0 0
DKI JAKARTA 0 0 0
JAWA BARAT 0 3 3
JAWA TENGAH 2 2 2
DI YOGYAKARTA 0 0 0
JAWA TIMUR 1 1 1
BANTEN 0 0 0
BALI 0 0 0
NUSA TENGGARA BARAT 0 0 0
NUSA TENGGARA TIMUR 0 0 0
KALIMANTAN BARAT 0 0 0
KALIMANTAN TENGAH 0 0 0
KALIMANTAN SELATAN 0 0 0
KALIMANTAN TIMUR 0 0 0
KALIMANTAN UTARA 0 0 0
SULAWESI UTARA 0 0 0
SULAWESI TENGAH 0 0 0
SULAWESI SELATAN 0 0 0
SULAWESI TENGGARA 0 0 0
GORONTALO 0 0 0
SULAWESI BARAT 0 0 0
MALUKU 0 0 0
MALUKU UTARA 0 0 0
PAPUA BARAT 0 0 0
PAPUA 0 0 0

Combination K-Means & Single Linkage¶

In [59]:
clf3 = NearestCentroid()
clf3.fit(principalComponents, cluster3 )
slc3=clf3.centroids_
print(slc3)
[[-0.17586741 -0.01699351 -0.02067896]
 [ 3.45358047  1.08084392 -0.17360785]
 [ 2.17417666 -0.5370515   0.83533448]]
In [60]:
clf4 = NearestCentroid()
clf4.fit(principalComponents, cluster4 )
slc4=clf4.centroids_
print(slc4)
[[-0.23303979  0.0154807  -0.00351477]
 [ 3.45358047  1.08084392 -0.17360785]
 [ 2.17417666 -0.5370515   0.83533448]
 [ 1.59647623 -1.02369398 -0.55276887]]
In [61]:
clf5 = NearestCentroid()
clf5.fit(principalComponents, cluster5 )
slc5=clf5.centroids_
print(slc5)
[[-0.25363976  0.02108769  0.02223899]
 [ 3.45358047  1.08084392 -0.17360785]
 [ 2.17417666 -0.5370515   0.83533448]
 [ 1.59647623 -1.02369398 -0.55276887]
 [ 0.38495947 -0.15272904 -0.77612743]]
In [62]:
slc3_ = np.array(slc3)
slc4_ = np.array(slc4)
slc5_ = np.array(slc5)
In [63]:
hkmeans3 = KMeans(n_clusters=3, init=slc3_,n_init=10,max_iter=300,random_state=42)
hkmeans3.fit(principalComponents)
identified_clusters_hKmeans3 = hkmeans3.fit_predict(principalComponents)
identified_clusters_hKmeans3
Out[63]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [64]:
hkmeans4 = KMeans(n_clusters=4, init=slc4_,n_init=10,max_iter=300,random_state=42)
hkmeans4.fit(principalComponents)
identified_clusters_hKmeans4 = hkmeans4.fit_predict(principalComponents)
identified_clusters_hKmeans4
Out[64]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [65]:
hkmeans5 = KMeans(n_clusters=5, init=slc5_,n_init=10,max_iter=300,random_state=42)
hkmeans5.fit(principalComponents)
identified_clusters_hKmeans5 = hkmeans5.fit_predict(principalComponents)
identified_clusters_hKmeans5
Out[65]:
array([0, 4, 4, 0, 0, 0, 0, 4, 0, 0, 0, 3, 2, 0, 1, 0, 0, 4, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [66]:
Hierarchical_Kmeans_Tabel = pd.DataFrame(data =identified_clusters_hKmeans3, columns=['k=3 hk'], index = ProduksiBuah20_22_scaled_cluster.index)
In [67]:
Hierarchical_Kmeans_Tabel['k=4 hk'] = identified_clusters_hKmeans4
Hierarchical_Kmeans_Tabel['k=5 hk'] = identified_clusters_hKmeans5
In [68]:
Hierarchical_Kmeans_Tabel
Out[68]:
k=3 hk k=4 hk k=5 hk
Provinsi
ACEH 0 0 0
SUMATERA UTARA 0 0 4
SUMATERA BARAT 0 0 4
RIAU 0 0 0
JAMBI 0 0 0
SUMATERA SELATAN 0 0 0
BENGKULU 0 0 0
LAMPUNG 0 0 4
KEP. BANGKA BELITUNG 0 0 0
KEP. RIAU 0 0 0
DKI JAKARTA 0 0 0
JAWA BARAT 2 3 3
JAWA TENGAH 2 2 2
DI YOGYAKARTA 0 0 0
JAWA TIMUR 1 1 1
BANTEN 0 0 0
BALI 0 0 0
NUSA TENGGARA BARAT 0 0 4
NUSA TENGGARA TIMUR 0 0 0
KALIMANTAN BARAT 0 0 0
KALIMANTAN TENGAH 0 0 0
KALIMANTAN SELATAN 0 0 0
KALIMANTAN TIMUR 0 0 0
KALIMANTAN UTARA 0 0 0
SULAWESI UTARA 0 0 0
SULAWESI TENGAH 0 0 0
SULAWESI SELATAN 0 0 0
SULAWESI TENGGARA 0 0 0
GORONTALO 0 0 0
SULAWESI BARAT 0 0 0
MALUKU 0 0 0
MALUKU UTARA 0 0 0
PAPUA BARAT 0 0 0
PAPUA 0 0 0
In [69]:
ProduksiBuah20_22_copy = pd.concat([ProduksiBuah20_22_copy, Kmeans_Tabel,SingleLinkage_Tabel,Hierarchical_Kmeans_Tabel ], axis=1)
ProduksiBuah20_22_scaled_cluster = pd.concat([ProduksiBuah20_22_scaled_cluster, Kmeans_Tabel,SingleLinkage_Tabel,Hierarchical_Kmeans_Tabel  ], axis=1)
In [70]:
kmeans3 = KMeans(n_clusters=3, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans3.fit(ProduksiBuah20_22_scaled)
kmeans4 = KMeans(n_clusters=4, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans4.fit(ProduksiBuah20_22_scaled)
kmeans5 = KMeans(n_clusters=5, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans5.fit(ProduksiBuah20_22_scaled)
Out[70]:
KMeans(n_clusters=5, random_state=42)

6. Evaluasi cluster¶

6.1 Calinski Harabasz Index¶

In [117]:
resultsk = {}

for i in range(3,6):
    kmeansk = KMeans(n_clusters=i, init='k-means++',n_init=10,max_iter=300,random_state=42)
    labelsk = kmeansk.fit_predict(principalComponents)
    db_indexk = calinski_harabasz_score(psc, labelsk)
    resultsk.update({i: db_indexk})

resultsr = {}

for i in range(3,6):
    kmeansr = KMeans(n_clusters=i, init='random',n_init=10,max_iter=300,random_state=42)
    labelsr = kmeansr.fit_predict(principalComponents)
    db_indexr = calinski_harabasz_score(psc, labelsr)
    resultsr.update({i: db_indexr})

resultshk = {}

for i in range(1,2):
    kmeanshk3_ = KMeans(n_clusters=3, init=slc3_,n_init=10,max_iter=300,random_state=42)
    labelshk3_ = kmeanshk3_.fit_predict(principalComponents)
    db_indexhk3_ = calinski_harabasz_score(psc, labelshk3_)
    resultshk.update({3: db_indexhk3_})
    kmeanshk4_ = KMeans(n_clusters=4, init=slc4_,n_init=10,max_iter=300,random_state=42)
    labelshk4_ = kmeanshk4_.fit_predict(principalComponents)
    db_indexhk4_ = calinski_harabasz_score(psc, labelshk4_)
    resultshk.update({4: db_indexhk4_})
    kmeanshk5_ = KMeans(n_clusters=5, init=slc5_,n_init=10,max_iter=300,random_state=42)
    labelshk5_ = kmeanshk5_.fit_predict(principalComponents)
    db_indexhk5_ = calinski_harabasz_score(psc, labelshk5_)
    resultshk.update({5: db_indexhk5_})

resultssl = {}

for i in range(3,6):
    sl = AgglomerativeClustering(n_clusters=i, affinity='euclidean', linkage='single')
    labelssl = sl.fit_predict(principalComponents)
    db_indexsl = calinski_harabasz_score(psc, labelssl)
    resultssl.update({i: db_indexsl})
In [118]:
sns.set(rc={"figure.figsize":(10, 6)})
plt.plot(list(resultsk.keys()), list(resultsk.values()), label='K-Means++')
plt.plot(list(resultsr.keys()), list(resultsr.values()), label='Random')
plt.plot(list(resultshk.keys()), list(resultshk.values()), label='Hierarchical K-Means')
plt.plot(list(resultssl.keys()), list(resultssl.values()), label='Single Linkage')
plt.title('Calinski Harabasz Score 2020-2022')
plt.xlabel("Number of clusters")
plt.ylabel("Calinski-Harabasz Index")
plt.legend()
plt.show()
In [73]:
ch_index_kmeans3_i = calinski_harabasz_score(psc, Kmeans_Tabel['k=3 kmeans++'])
ch_index_kmeans4_i = calinski_harabasz_score(psc, Kmeans_Tabel['k=4 kmeans++'])
ch_index_kmeans5_i = calinski_harabasz_score(psc, Kmeans_Tabel['k=5 kmeans++'])
ch_index_kmeans3_r = calinski_harabasz_score(psc, Kmeans_Tabel['k=3 random'])
ch_index_kmeans4_r = calinski_harabasz_score(psc, Kmeans_Tabel['k=4 random'])
ch_index_kmeans5_r = calinski_harabasz_score(psc, Kmeans_Tabel['k=5 random'])
ch_index_hkmeans3 = calinski_harabasz_score(psc, Hierarchical_Kmeans_Tabel['k=3 hk'])
ch_index_hkmeans4 = calinski_harabasz_score(psc, Hierarchical_Kmeans_Tabel['k=4 hk'])
ch_index_hkmeans5 = calinski_harabasz_score(psc, Hierarchical_Kmeans_Tabel['k=5 hk'])
In [74]:
print('Calinski Harabasz Index K-means 3 kmeans++:',ch_index_kmeans3_i)
print('Calinski Harabasz Index K-means 4 kmeans++:',ch_index_kmeans4_i)
print('Calinski Harabasz Index K-means 5 kmeans++:',ch_index_kmeans5_i)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans3_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans4_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans5_r)
print('Calinski Harabasz Index HK-means 3:',ch_index_hkmeans3)
print('Calinski Harabasz Index HK-means 4:',ch_index_hkmeans4)
print('Calinski Harabasz Index HK-means 5:',ch_index_hkmeans5)
Calinski Harabasz Index K-means 3 kmeans++: 30.27567892886422
Calinski Harabasz Index K-means 4 kmeans++: 28.756485814113994
Calinski Harabasz Index K-means 5 kmeans++: 27.523183992221334
Calinski Harabasz Index K-means 3 random: 25.64022381449592
Calinski Harabasz Index K-means 3 random: 19.29857643823079
Calinski Harabasz Index K-means 3 random: 24.12419255437759
Calinski Harabasz Index HK-means 3: 30.27567892886422
Calinski Harabasz Index HK-means 4: 24.59244988312941
Calinski Harabasz Index HK-means 5: 27.523183992221334
In [75]:
ch_index_SingleLinkage3 = calinski_harabasz_score(psc, cluster3 )
ch_index_SingleLinkage4 = calinski_harabasz_score(psc, cluster4 )
ch_index_SingleLinkage5 = calinski_harabasz_score(psc, cluster5 )
In [76]:
print('Calinski Harabasz Index Single Linkage 3:',ch_index_SingleLinkage3)
print('Calinski Harabasz Index Single Linkage 4:',ch_index_SingleLinkage4)
print('Calinski Harabasz Index Single Linkage 5:',ch_index_SingleLinkage5)
Calinski Harabasz Index Single Linkage 3: 20.493703416896846
Calinski Harabasz Index Single Linkage 4: 24.59244988312941
Calinski Harabasz Index Single Linkage 5: 22.101845709584115

6.2 Silhouette coefficient¶

In [119]:
resultsk = {}

for i in range(3,6):
    kmeansk = KMeans(n_clusters=i, init='k-means++',n_init=10,max_iter=300,random_state=42)
    labelsk = kmeansk.fit_predict(principalComponents)
    db_indexk = silhouette_score(psc, labelsk)
    resultsk.update({i: db_indexk})

resultsr = {}

for i in range(3,6):
    kmeansr = KMeans(n_clusters=i, init='random',n_init=10,max_iter=300,random_state=42)
    labelsr = kmeansr.fit_predict(principalComponents)
    db_indexr = silhouette_score(psc, labelsr)
    resultsr.update({i: db_indexr})

resultshk = {}

for i in range(1,2):
    kmeanshk3_ = KMeans(n_clusters=3, init=slc3_,n_init=10,max_iter=300,random_state=42)
    labelshk3_ = kmeanshk3_.fit_predict(principalComponents)
    db_indexhk3_ = silhouette_score(psc, labelshk3_)
    resultshk.update({3: db_indexhk3_})
    kmeanshk4_ = KMeans(n_clusters=4, init=slc4_,n_init=10,max_iter=300,random_state=42)
    labelshk4_ = kmeanshk4_.fit_predict(principalComponents)
    db_indexhk4_ = silhouette_score(psc, labelshk4_)
    resultshk.update({4: db_indexhk4_})
    kmeanshk5_ = KMeans(n_clusters=5, init=slc5_,n_init=10,max_iter=300,random_state=42)
    labelshk5_ = kmeanshk5_.fit_predict(principalComponents)
    db_indexhk5_ = silhouette_score(psc, labelshk5_)
    resultshk.update({5: db_indexhk5_})

resultssl = {}

for i in range(3,6):
    sl = AgglomerativeClustering(n_clusters=i, affinity='euclidean', linkage='single')
    labelssl = sl.fit_predict(principalComponents)
    db_indexsl = silhouette_score(psc, labelssl)
    resultssl.update({i: db_indexsl})
In [121]:
sns.set(rc={"figure.figsize":(10, 6)})
plt.plot(list(resultsk.keys()), list(resultsk.values()), label='K-Means++')
plt.plot(list(resultsr.keys()), list(resultsr.values()), label='Random')
plt.plot(list(resultshk.keys()), list(resultshk.values()), label='Hierarchical K-Means')
plt.plot(list(resultssl.keys()), list(resultssl.values()), label='Single Linkage')
plt.title('Silhouette Coefficient Score 2020-2022')
plt.xlabel("Number of clusters")
plt.ylabel("Silhouette Coefficient Index")
plt.legend()
plt.show()
In [79]:
score3_i = silhouette_score(psc, Kmeans_Tabel['k=3 kmeans++'], metric='euclidean')
score4_i = silhouette_score(psc, Kmeans_Tabel['k=4 kmeans++'], metric='euclidean')
score5_i = silhouette_score(psc, Kmeans_Tabel['k=5 kmeans++'], metric='euclidean')
score3_r = silhouette_score(psc, Kmeans_Tabel['k=3 random'], metric='euclidean')
score4_r = silhouette_score(psc, Kmeans_Tabel['k=4 random'], metric='euclidean')
score5_r = silhouette_score(psc, Kmeans_Tabel['k=5 random'], metric='euclidean')
hscore3 = silhouette_score(psc, identified_clusters_hKmeans3, metric='euclidean')
hscore4 = silhouette_score(psc, identified_clusters_hKmeans4, metric='euclidean')
hscore5 = silhouette_score(psc, identified_clusters_hKmeans5, metric='euclidean')
In [80]:
print('Silhouette coefficient K-means 3 k-means++:',score3_i)
print('Silhouette coefficient K-means 4 k-means++:',score4_i)
print('Silhouette coefficient K-means 5 k-means++:',score5_i)
print('Silhouette coefficient K-means 3 random:',score3_r)
print('Silhouette coefficient K-means 4 random:',score4_r)
print('Silhouette coefficient K-means 5 random:',score5_r)
print('Silhouette coefficient HK-means 3:',hscore3)
print('Silhouette coefficient HK-means 4:',hscore4)
print('Silhouette coefficient HK-means 5:',hscore5)
Silhouette coefficient K-means 3 k-means++: 0.6745383942607401
Silhouette coefficient K-means 4 k-means++: 0.5024000913928874
Silhouette coefficient K-means 5 k-means++: 0.4941079464595051
Silhouette coefficient K-means 3 random: 0.4991462727828761
Silhouette coefficient K-means 4 random: 0.2957044656981662
Silhouette coefficient K-means 5 random: 0.3073022491918449
Silhouette coefficient HK-means 3: 0.6745383942607401
Silhouette coefficient HK-means 4: 0.6408219489733553
Silhouette coefficient HK-means 5: 0.4941079464595051
In [81]:
scores3 = silhouette_score(psc, cluster3, metric='euclidean')
scores4 = silhouette_score(psc, cluster4, metric='euclidean')
scores5 = silhouette_score(psc, cluster5, metric='euclidean')
In [82]:
print('Silhouette coefficient K-means 3:',scores3)
print('Silhouette coefficient K-means 4:',scores4)
print('Silhouette coefficient K-means 5:',scores5)
Silhouette coefficient K-means 3: 0.6560560906881828
Silhouette coefficient K-means 4: 0.6408219489733553
Silhouette coefficient K-means 5: 0.47206141916418703
In [ ]:
 

Original Data¶

6. Evaluasi cluster¶

6.1 Calinski Harabasz Index¶

In [83]:
ch_index_kmeans3_i = calinski_harabasz_score(pb, Kmeans_Tabel['k=3 kmeans++'])
ch_index_kmeans4_i = calinski_harabasz_score(pb, Kmeans_Tabel['k=4 kmeans++'])
ch_index_kmeans5_i = calinski_harabasz_score(pb, Kmeans_Tabel['k=5 kmeans++'])
ch_index_kmeans3_r = calinski_harabasz_score(pb, Kmeans_Tabel['k=3 random'])
ch_index_kmeans4_r = calinski_harabasz_score(pb, Kmeans_Tabel['k=4 random'])
ch_index_kmeans5_r = calinski_harabasz_score(pb, Kmeans_Tabel['k=5 random'])
ch_index_hkmeans3 = calinski_harabasz_score(pb, Hierarchical_Kmeans_Tabel['k=3 hk'])
ch_index_hkmeans4 = calinski_harabasz_score(pb, Hierarchical_Kmeans_Tabel['k=4 hk'])
ch_index_hkmeans5 = calinski_harabasz_score(pb, Hierarchical_Kmeans_Tabel['k=5 hk'])
In [84]:
print('Calinski Harabasz Index K-means 3 kmeans++:',ch_index_kmeans3_i)
print('Calinski Harabasz Index K-means 4 kmeans++:',ch_index_kmeans4_i)
print('Calinski Harabasz Index K-means 5 kmeans++:',ch_index_kmeans5_i)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans3_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans4_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans5_r)
print('Calinski Harabasz Index HK-means 3:',ch_index_hkmeans3)
print('Calinski Harabasz Index HK-means 4:',ch_index_hkmeans4)
print('Calinski Harabasz Index HK-means 5:',ch_index_hkmeans5)
Calinski Harabasz Index K-means 3 kmeans++: 57.982645118375565
Calinski Harabasz Index K-means 4 kmeans++: 49.69512635670209
Calinski Harabasz Index K-means 5 kmeans++: 41.93869884583579
Calinski Harabasz Index K-means 3 random: 29.186360201480685
Calinski Harabasz Index K-means 3 random: 16.58936536079578
Calinski Harabasz Index K-means 3 random: 39.53415387813175
Calinski Harabasz Index HK-means 3: 57.98264511837555
Calinski Harabasz Index HK-means 4: 42.40873814090654
Calinski Harabasz Index HK-means 5: 41.93869884583579
In [85]:
ch_index_SingleLinkage3 = calinski_harabasz_score(psc, cluster3 )
ch_index_SingleLinkage4 = calinski_harabasz_score(psc, cluster4 )
ch_index_SingleLinkage5 = calinski_harabasz_score(psc, cluster5 )
In [86]:
print('Calinski Harabasz Index Single Linkage 3:',ch_index_SingleLinkage3)
print('Calinski Harabasz Index Single Linkage 4:',ch_index_SingleLinkage4)
print('Calinski Harabasz Index Single Linkage 5:',ch_index_SingleLinkage5)
Calinski Harabasz Index Single Linkage 3: 20.493703416896846
Calinski Harabasz Index Single Linkage 4: 24.59244988312941
Calinski Harabasz Index Single Linkage 5: 22.101845709584115

6.2 Silhouette coefficient¶

In [87]:
score3_i = silhouette_score(pb, Kmeans_Tabel['k=3 kmeans++'], metric='euclidean')
score4_i = silhouette_score(pb, Kmeans_Tabel['k=4 kmeans++'], metric='euclidean')
score5_i = silhouette_score(pb, Kmeans_Tabel['k=5 kmeans++'], metric='euclidean')
score3_r = silhouette_score(pb, Kmeans_Tabel['k=3 random'], metric='euclidean')
score4_r = silhouette_score(pb, Kmeans_Tabel['k=4 random'], metric='euclidean')
score5_r = silhouette_score(pb, Kmeans_Tabel['k=5 random'], metric='euclidean')
hscore3 = silhouette_score(pb, identified_clusters_hKmeans3, metric='euclidean')
hscore4 = silhouette_score(pb, identified_clusters_hKmeans4, metric='euclidean')
hscore5 = silhouette_score(pb, identified_clusters_hKmeans5, metric='euclidean')
In [88]:
print('Silhouette coefficient K-means 3 k-means++:',score3_i)
print('Silhouette coefficient K-means 4 k-means++:',score4_i)
print('Silhouette coefficient K-means 5 k-means++:',score5_i)
print('Silhouette coefficient K-means 3 random:',score3_r)
print('Silhouette coefficient K-means 4 random:',score4_r)
print('Silhouette coefficient K-means 5 random:',score5_r)
print('Silhouette coefficient HK-means 3:',hscore3)
print('Silhouette coefficient HK-means 4:',hscore4)
print('Silhouette coefficient HK-means 5:',hscore5)
Silhouette coefficient K-means 3 k-means++: 0.726621062844469
Silhouette coefficient K-means 4 k-means++: 0.5201142248204489
Silhouette coefficient K-means 5 k-means++: 0.4986093315415186
Silhouette coefficient K-means 3 random: 0.5105054377850797
Silhouette coefficient K-means 4 random: 0.23293788852406308
Silhouette coefficient K-means 5 random: 0.25895015082520845
Silhouette coefficient HK-means 3: 0.726621062844469
Silhouette coefficient HK-means 4: 0.6751365567576909
Silhouette coefficient HK-means 5: 0.4986093315415186
In [89]:
scores3 = silhouette_score(pb, cluster3, metric='euclidean')
scores4 = silhouette_score(pb, cluster4, metric='euclidean')
scores5 = silhouette_score(pb, cluster5, metric='euclidean')
In [90]:
print('Silhouette coefficient K-means 3:',scores3)
print('Silhouette coefficient K-means 4:',scores4)
print('Silhouette coefficient K-means 5:',scores5)
Silhouette coefficient K-means 3: 0.6340872807029864
Silhouette coefficient K-means 4: 0.6751365567576909
Silhouette coefficient K-means 5: 0.12120922846162763

6.3 Visualisasi¶

In [91]:
class Radar(object):
    def __init__(self, figure, title, labels, rect=None):
        if rect is None:
            rect = [0.05, 0.05, 0.9, 0.9]

        self.n = len(title)
        self.angles = np.arange(0, 360, 360.0/self.n)
        
        self.axes = [figure.add_axes(rect, projection='polar', label='axes%d' % i) for i in range(self.n)]
        self.ax = self.axes[0]
        self.ax.set_thetagrids(self.angles, labels=title, fontsize=14, backgroundcolor="white",zorder=999) # Feature names
        self.ax.set_yticklabels([])
        
        for ax in self.axes[1:]:
            ax.xaxis.set_visible(False)
            ax.set_yticklabels([])
            ax.set_zorder(-99)

        for ax, angle, label in zip(self.axes, self.angles, labels):
            ax.spines['polar'].set_color('black')
            ax.spines['polar'].set_zorder(-99)
                     
    def plot(self, values, *args, **kw):
        angle = np.deg2rad(np.r_[self.angles, self.angles[0]])
        values = np.r_[values, values[0]]
        self.ax.plot(angle, values, *args, **kw)
        kw['label'] = '_noLabel'
        self.ax.fill(angle, values,*args,**kw)
In [92]:
# Let's define max. 6 different cluster colors - if you like you can add here more.
cluster_colors = ['#b4d2b1', '#568f8b', '#1d4a60', '#cd7e59', '#ddb247', '#d15252']

6.3.1 K-Means++¶

In [93]:
X_mean3_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=3 kmeans++').mean().T], axis=1)
X_dev_rel3_i = X_mean3_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean3_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=3 kmeans++').mean().T], axis=1)

X_std_dev_rel3_i = X_std_mean3_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [94]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))

for k in range(0,kmeans3.n_clusters):
    cluster_data = X_std_mean3_i[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 3 kmeans++: Feature means per cluster", size=22, pad=60)
plt.show()
In [95]:
X_mean4_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=4 kmeans++').mean().T], axis=1)
X_dev_rel4_i = X_mean4_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean4_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=4 kmeans++').mean().T], axis=1)

X_std_dev_rel4_i = X_std_mean4_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [96]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans3.labels_))

for k in range(0,kmeans4.n_clusters):
    cluster_data = X_std_mean4_i[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 4 kmeans++: Feature means per cluster", size=22, pad=60)
plt.show()
In [97]:
X_mean5_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=5 kmeans++').mean().T], axis=1)
X_dev_rel5_i = X_mean5_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean5_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=5 kmeans++').mean().T], axis=1)

X_std_dev_rel5_i = X_std_mean5_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [98]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))

for k in range(0,kmeans5.n_clusters):
    cluster_data = X_std_mean5_i[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 5 kmeans++: Feature means per cluster", size=22, pad=60)
plt.show()

6.3.2 Random¶

In [99]:
X_mean3_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=3 random').mean().T], axis=1)
X_dev_rel3_r = X_mean3_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean3_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=3 random').mean().T], axis=1)

X_std_dev_rel3_r = X_std_mean3_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [100]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))

for k in range(0,kmeans3.n_clusters):
    cluster_data = X_std_mean3_r[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 3 random: Feature means per cluster", size=22, pad=60)
plt.show()
In [101]:
X_mean4_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=4 random').mean().T], axis=1)
X_dev_rel4_r = X_mean4_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean4_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=4 random').mean().T], axis=1)

X_std_dev_rel4_r = X_std_mean4_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [102]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans4.labels_))

for k in range(0,kmeans4.n_clusters):
    cluster_data = X_std_mean4_r[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 4 random: Feature means per cluster", size=22, pad=60)
plt.show()
In [103]:
X_mean5_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=5 random').mean().T], axis=1)
X_dev_rel5_r = X_mean5_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean5_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=5 random').mean().T], axis=1)

X_std_dev_rel5_r = X_std_mean5_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [104]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))

for k in range(0,kmeans5.n_clusters):
    cluster_data = X_std_mean5_r[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 5 random: Feature means per cluster", size=22, pad=60)
plt.show()

6.3.3 Hierarchical KMeans Clustering¶

In [105]:
X_mean3_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=3 hk').mean().T], axis=1)
X_dev_rel3_hk = X_mean3_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean3_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=3 hk').mean().T], axis=1)

X_std_dev_rel3_hk = X_std_mean3_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [106]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))

for k in range(0,kmeans3.n_clusters):
    cluster_data = X_std_mean3_hk[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 3 hierarchical kmeans: Feature means per cluster", size=22, pad=60)
plt.show()
In [107]:
X_mean4_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=4 hk').mean().T], axis=1)
X_dev_rel4_hk = X_mean4_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean4_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=4 hk').mean().T], axis=1)

X_std_dev_rel4_hk = X_std_mean4_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [108]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans4.labels_))

for k in range(0,kmeans4.n_clusters):
    cluster_data = X_std_mean4_hk[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 4 hierarchical kmeans: Feature means per cluster", size=22, pad=60)
plt.show()
In [109]:
X_mean5_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=5 hk').mean().T], axis=1)
X_dev_rel5_hk = X_mean5_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean5_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=5 hk').mean().T], axis=1)

X_std_dev_rel5_hk = X_std_mean5_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [110]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))

for k in range(0,kmeans5.n_clusters):
    cluster_data = X_std_mean5_hk[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 5 hierarchical kmeans: Feature means per cluster", size=22, pad=60)
plt.show()

6.3.4 Single Linkage¶

In [111]:
X_mean3_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=3').mean().T], axis=1)
X_dev_rel3_sl = X_mean3_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean3_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=3').mean().T], axis=1)

X_std_dev_rel3_sl = X_std_mean3_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [112]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))

for k in range(0,kmeans3.n_clusters):
    cluster_data = X_std_mean3_sl[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 3 Single Linkage: Feature means per cluster", size=22, pad=60)
plt.show()
In [113]:
X_mean4_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=4').mean().T], axis=1)
X_dev_rel4_sl = X_mean4_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean4_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=4').mean().T], axis=1)

X_std_dev_rel4_sl = X_std_mean4_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [114]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans4.labels_))

for k in range(0,kmeans4.n_clusters):
    cluster_data = X_std_mean4_sl[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 4 Single Linkage: Feature means per cluster", size=22, pad=60)
plt.show()
In [115]:
X_mean5_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']), 
                   ProduksiBuah20_22_copy.groupby('k=5').mean().T], axis=1)
X_dev_rel5_sl = X_mean5_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)

X_std_mean5_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']), 
                   ProduksiBuah20_22_scaled_cluster.groupby('k=5').mean().T], axis=1)

X_std_dev_rel5_sl = X_std_mean5_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
In [116]:
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))

for k in range(0,kmeans5.n_clusters):
    cluster_data = X_std_mean5_sl[k].values.tolist()
    radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))

radar.ax.legend()
radar.ax.set_title("Cluster 5 Single Linkage: Feature means per cluster", size=22, pad=60)
plt.show()
In [ ]:
 
In [ ]:
 
In [ ]: